library(here)
library(tidyverse)
library(cowplot)
library(GGally)
theme_set(theme_light(base_size = 14))
library(scuttle)
library(scater)
library(scran)
library(kableExtra)
library(DDCompanion)
library(iCOBRA)## Directory setup
here_root <- "benchmarks/covid"
here::i_am(file.path(here_root, "analysis/covid-sim-results.Rmd"))
#> here() starts at /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2
data_dir <- here::here(here_root, "data")
stopifnot(dir.exists(data_dir))
res_dir <- here::here(here_root, "results")
stopifnot(dir.exists(res_dir))methods: bGLM, qbGLM, qbGLM_offset, qbGLM_offset_squeeze, edgeR_NB, edgeR_QP, edgeR_NB_optim, edgeR_QP_optimcelltype: immature_B_cell_Healthyprop_DE: 0.05datatype: pbres_files <- map(prop_DE, ~ get_sim_res_files(dataset = "covid",
methods = methods,
datatype = datatype,
prop_DE = .x,
celltype = celltype
)) %>% set_names(paste0("prop_DE_", prop_DE))
#res_list <- map_depth(res_files, 2, readRDS)
res_list <- lapply(res_files, function(element){
lapply(element, readRDS)
})
## Check for expected data structure
stopifnot(unlist(map_depth(res_list, 3, ~ is.data.frame(.x$results))))sce_files <- map(prop_DE, ~ get_SCE_files(
dataset = "covid",
which = "sim_replicates", prop_DE = .x,
celltype = celltype
)) %>% set_names(paste0("prop_DE_", prop_DE))
sce_objects <- lapply(sce_files, readRDS)prop_DE level. I.e. prop_DE_0.1$replicate1 and prop_DE_0.05$replicate1 will have the same mock group assignmentmap_dfr(sce_objects,
~ map_dfr(.x, function(x) c(nrows = nrow(x), ncols = ncol(x)),
.id = "replicate"
),
.id = "prop_DE"
)Subjects are divided across mock groups as follows:
lapply(sce_objects[[1]], function(x) table(x$donor_id, x$mock_group))
#> $replicate_1
#>
#> A B
#> CV0902 26 0
#> CV0904 21 0
#> CV0926 0 24
#> CV0929 0 39
#> CV0934 20 0
#> CV0939 0 24
#> CV0940 21 0
#> MH8919178 77 0
#> MH8919179 0 22
#> MH8919227 27 0
#> MH8919282 0 55
#> MH8919283 0 61
#> MH8919332 21 0
#> MH8919333 0 93
#> newcastle65 22 0
#> newcastle74 0 27
#>
#> $replicate_2
#>
#> A B
#> CV0902 0 26
#> CV0904 21 0
#> CV0926 0 24
#> CV0929 0 39
#> CV0934 0 20
#> CV0939 24 0
#> CV0940 21 0
#> MH8919178 77 0
#> MH8919179 22 0
#> MH8919227 0 27
#> MH8919282 0 55
#> MH8919283 0 61
#> MH8919332 21 0
#> MH8919333 93 0
#> newcastle65 22 0
#> newcastle74 0 27
#>
#> $replicate_3
#>
#> A B
#> CV0902 0 26
#> CV0904 21 0
#> CV0926 0 24
#> CV0929 39 0
#> CV0934 0 20
#> CV0939 24 0
#> CV0940 0 21
#> MH8919178 0 77
#> MH8919179 22 0
#> MH8919227 27 0
#> MH8919282 55 0
#> MH8919283 0 61
#> MH8919332 0 21
#> MH8919333 93 0
#> newcastle65 22 0
#> newcastle74 0 27
#>
#> $replicate_4
#>
#> A B
#> CV0902 0 26
#> CV0904 0 21
#> CV0926 24 0
#> CV0929 39 0
#> CV0934 0 20
#> CV0939 0 24
#> CV0940 21 0
#> MH8919178 0 77
#> MH8919179 0 22
#> MH8919227 0 27
#> MH8919282 0 55
#> MH8919283 61 0
#> MH8919332 0 21
#> MH8919333 93 0
#> newcastle65 22 0
#> newcastle74 27 0
#>
#> $replicate_5
#>
#> A B
#> CV0902 0 26
#> CV0904 0 21
#> CV0926 24 0
#> CV0929 39 0
#> CV0934 0 20
#> CV0939 0 24
#> CV0940 21 0
#> MH8919178 77 0
#> MH8919179 0 22
#> MH8919227 27 0
#> MH8919282 0 55
#> MH8919283 0 61
#> MH8919332 21 0
#> MH8919333 0 93
#> newcastle65 22 0
#> newcastle74 0 27The number of DE and non-DE genes per replicate:
map(sce_objects, ~ map_dfr(.x, ~ table(rowData(.x)$is_DE), .id = "replicate"))
#> $prop_DE_0.05
#> # A tibble: 5 × 3
#> replicate `FALSE` `TRUE`
#> <chr> <table[1d]> <table[1d]>
#> 1 replicate_1 11575 608
#> 2 replicate_2 11574 609
#> 3 replicate_3 11574 609
#> 4 replicate_4 11575 608
#> 5 replicate_5 11574 609tSNE_plots <- map(sce_objects, function(x) {
p <- imap(x, function(sce, name) {
plotTSNE(sce, colour_by = "mock_group") +
ggtitle(name)
})
patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})## Get runtimes for each celltype
runtimes <- map(res_list, ~ map_dfr(.x, get_runtimes, depth = 1, .id = "method"))res_tables <- map_depth(res_list, 2, get_aggregated_rep_tables, depth = 1)
res_tables <- map(res_tables, ~ combine_tables(.x, .id = "method"))runtime_plots <- imap(
runtimes,
~ plot_run_times(.x, width = 0.2, height = 0) +
ggtitle(.y)
)pval_figs <- map(res_tables, ~ pval_hist(.x))non_de_res <- map2(sce_objects, res_tables, function(sce_list, res) {
by_rep <- split(res, res$replicate)
out <- map2(sce_list, by_rep, function(sce, tbl) {
## Select only non-DE genes
non_de <- rownames(sce)[!rowData(sce)$is_DE]
tbl[tbl$gene %in% non_de, ]
})
bind_rows(out, .id = "replicate")
})non_de_pval_figs <- map(non_de_res, ~ pval_hist(.x))iCOBRAP-values for missing genes are set to 1.
cobra_data <- map2(res_tables, sce_objects, function(res_table, sce_list) {
## Split up results per replicate
res_per_replicate <- split(res_table, res_table$replicate)
map2(res_per_replicate, sce_list, prepare_COBRAData, replace_missing = TRUE)
})
cobra_perf <- map_depth(cobra_data, 2, calculate_performance, binary_truth = "status")
n_methods <- length(unique(res_tables[[1]]$method)) + 1 # +1 for "truth"
cobra_objects <- map_depth(cobra_perf, 2, prepare_data_for_plot)no_legend <- theme(legend.position = "none")
tpr_plots <- map(cobra_objects, function(cobra_list) {
p <- imap(cobra_list, ~ plot_tpr(.x, title = .y) + no_legend)
patchwork::wrap_plots(p, ncol = 3)
})fpr_plots <- map(cobra_objects, function(cobra_list) {
p <- imap(cobra_list, ~ plot_fpr(.x, title = .y) + no_legend)
patchwork::wrap_plots(p, ncol = 3)
})fdr_tpr_plots <- map(cobra_objects, function(cobra_list) {
p <- imap(cobra_list, ~ plot_fdrtprcurve(.x, title = .y))
patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})Same plots but zoomed on the FDR-TPR points region.
fdr_tpr_plots_zoomed <- map(cobra_objects, function(cobra_list) {
p <- imap(cobra_list, ~ plot_zoomed_fdrtprcurve(.x, title = .y))
patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})#> [1] "2023-07-21 19:19:03 CEST"
#> ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.0 (2022-04-22)
#> os Red Hat Enterprise Linux 8.6 (Ootpa)
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Paris
#> date 2023-07-21
#> pandoc 2.13 @ /apps/gent/RHEL8/zen2-ib/software/Pandoc/2.13/bin/ (via rmarkdown)
#>
#> ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#> ! package * version date (UTC) lib source
#> P argparse * 2.2.2 2023-02-15 [?] CRAN (R 4.2.0)
#> P beachmat 2.14.2 2023-04-07 [?] Bioconductor
#> P beeswarm 0.4.0 2021-06-01 [?] CRAN (R 4.2.0)
#> P Biobase * 2.58.0 2022-11-01 [?] Bioconductor
#> P BiocGenerics * 0.44.0 2022-11-01 [?] Bioconductor
#> P BiocManager 1.30.20 2023-02-24 [?] CRAN (R 4.2.0)
#> P BiocNeighbors 1.16.0 2022-11-01 [?] Bioconductor
#> P BiocParallel 1.32.6 2023-03-17 [?] Bioconductor
#> P BiocSingular 1.14.0 2022-11-01 [?] Bioconductor
#> P bitops 1.0-7 2021-04-24 [?] CRAN (R 4.2.0)
#> P bluster 1.8.0 2022-11-01 [?] Bioconductor
#> P bslib 0.4.2 2022-12-16 [?] CRAN (R 4.2.0)
#> P cachem 1.0.8 2023-05-01 [?] CRAN (R 4.2.0)
#> P cli 3.6.1 2023-03-23 [?] CRAN (R 4.2.0)
#> P cluster 2.1.4 2022-08-22 [?] CRAN (R 4.2.0)
#> P codetools 0.2-19 2023-02-01 [?] CRAN (R 4.2.0)
#> P colorspace 2.1-0 2023-01-23 [?] CRAN (R 4.2.0)
#> P cowplot * 1.1.1 2020-12-30 [?] CRAN (R 4.2.0)
#> DDCompanion * 0.1.15 2023-05-22 [1] local (./package)
#> P DelayedArray 0.24.0 2022-11-01 [?] Bioconductor
#> P DelayedMatrixStats 1.20.0 2022-11-01 [?] Bioconductor
#> P digest 0.6.31 2022-12-11 [?] CRAN (R 4.2.0)
#> P dplyr * 1.1.2 2023-04-20 [?] CRAN (R 4.2.0)
#> P dqrng 0.3.0 2021-05-01 [?] CRAN (R 4.2.0)
#> P DT 0.28 2023-05-18 [?] CRAN (R 4.2.0)
#> P edgeR 3.40.2 2023-01-19 [?] Bioconductor
#> P ellipsis 0.3.2 2021-04-29 [?] CRAN (R 4.2.0)
#> P evaluate 0.21 2023-05-05 [?] CRAN (R 4.2.0)
#> P fansi 1.0.4 2023-01-22 [?] CRAN (R 4.2.0)
#> P farver 2.1.1 2022-07-06 [?] CRAN (R 4.2.0)
#> P fastmap 1.1.1 2023-02-24 [?] CRAN (R 4.2.0)
#> P findpython 1.0.8 2023-03-14 [?] CRAN (R 4.2.0)
#> P forcats * 1.0.0 2023-01-29 [?] CRAN (R 4.2.0)
#> P generics 0.1.3 2022-07-05 [?] CRAN (R 4.2.0)
#> P GenomeInfoDb * 1.34.9 2023-02-02 [?] Bioconductor
#> P GenomeInfoDbData 1.2.9 2023-05-20 [?] Bioconductor
#> P GenomicRanges * 1.50.2 2022-12-16 [?] Bioconductor
#> P GGally * 2.1.2 2021-06-21 [?] CRAN (R 4.2.0)
#> P ggbeeswarm 0.7.2 2023-04-29 [?] CRAN (R 4.2.0)
#> P ggplot2 * 3.4.2 2023-04-03 [?] CRAN (R 4.2.0)
#> P ggrepel 0.9.3 2023-02-03 [?] CRAN (R 4.2.0)
#> P glue 1.6.2 2022-02-24 [?] CRAN (R 4.2.0)
#> P gridExtra 2.3 2017-09-09 [?] CRAN (R 4.2.0)
#> P gtable 0.3.3 2023-03-21 [?] CRAN (R 4.2.0)
#> P here * 1.0.1 2020-12-13 [?] CRAN (R 4.2.0)
#> P highr 0.10 2022-12-22 [?] CRAN (R 4.2.0)
#> P hms 1.1.3 2023-03-21 [?] CRAN (R 4.2.0)
#> P htmltools 0.5.5 2023-03-23 [?] CRAN (R 4.2.0)
#> P htmlwidgets 1.6.2 2023-03-17 [?] CRAN (R 4.2.0)
#> P httpuv 1.6.11 2023-05-11 [?] CRAN (R 4.2.0)
#> P httr 1.4.6 2023-05-08 [?] CRAN (R 4.2.0)
#> P iCOBRA * 1.26.0 2022-11-01 [?] Bioconductor
#> P igraph 1.4.3 2023-05-22 [?] CRAN (R 4.2.0)
#> P IRanges * 2.32.0 2022-11-01 [?] Bioconductor
#> P irlba 2.3.5.1 2022-10-03 [?] CRAN (R 4.2.0)
#> P jquerylib 0.1.4 2021-04-26 [?] CRAN (R 4.2.0)
#> P jsonlite 1.8.4 2022-12-06 [?] CRAN (R 4.2.0)
#> P kableExtra * 1.3.4 2021-02-20 [?] CRAN (R 4.2.0)
#> P knitr 1.42 2023-01-25 [?] CRAN (R 4.2.0)
#> P labeling 0.4.2 2020-10-20 [?] CRAN (R 4.2.0)
#> P later 1.3.1 2023-05-02 [?] CRAN (R 4.2.0)
#> P lattice 0.21-8 2023-04-05 [?] CRAN (R 4.2.0)
#> P lifecycle 1.0.3 2022-10-07 [?] CRAN (R 4.2.0)
#> P limma 3.54.2 2023-02-28 [?] Bioconductor
#> P locfit 1.5-9.7 2023-01-02 [?] CRAN (R 4.2.0)
#> P lubridate * 1.9.2 2023-02-10 [?] CRAN (R 4.2.0)
#> P magrittr 2.0.3 2022-03-30 [?] CRAN (R 4.2.0)
#> P Matrix 1.5-4.1 2023-05-18 [?] CRAN (R 4.2.0)
#> P MatrixGenerics * 1.10.0 2022-11-01 [?] Bioconductor
#> P matrixStats * 0.63.0 2022-11-18 [?] CRAN (R 4.2.0)
#> P metapod 1.6.0 2022-11-01 [?] Bioconductor
#> P mime 0.12 2021-09-28 [?] CRAN (R 4.2.0)
#> P munsell 0.5.0 2018-06-12 [?] CRAN (R 4.2.0)
#> P patchwork 1.1.2 2022-08-19 [?] CRAN (R 4.2.0)
#> P pillar 1.9.0 2023-03-22 [?] CRAN (R 4.2.0)
#> P pkgconfig 2.0.3 2019-09-22 [?] CRAN (R 4.2.0)
#> P plyr 1.8.8 2022-11-11 [?] CRAN (R 4.2.0)
#> P promises 1.2.0.1 2021-02-11 [?] CRAN (R 4.2.0)
#> P purrr * 1.0.1 2023-01-10 [?] CRAN (R 4.2.0)
#> P R6 2.5.1 2021-08-19 [?] CRAN (R 4.2.0)
#> P RColorBrewer 1.1-3 2022-04-03 [?] CRAN (R 4.2.0)
#> P Rcpp 1.0.10 2023-01-22 [?] CRAN (R 4.2.0)
#> P RCurl 1.98-1.12 2023-03-27 [?] CRAN (R 4.2.0)
#> P readr * 2.1.4 2023-02-10 [?] CRAN (R 4.2.0)
#> renv 0.17.3 2023-04-06 [1] CRAN (R 4.2.0)
#> P reshape 0.8.9 2022-04-12 [?] CRAN (R 4.2.0)
#> P reshape2 1.4.4 2020-04-09 [?] CRAN (R 4.2.0)
#> P rlang 1.1.1 2023-04-28 [?] CRAN (R 4.2.0)
#> P rmarkdown 2.21 2023-03-26 [?] CRAN (R 4.2.0)
#> P ROCR 1.0-11 2020-05-02 [?] CRAN (R 4.2.0)
#> P rprojroot 2.0.3 2022-04-02 [?] CRAN (R 4.2.0)
#> P rstudioapi 0.14 2022-08-22 [?] CRAN (R 4.2.0)
#> P rsvd 1.0.5 2021-04-16 [?] CRAN (R 4.2.0)
#> P rvest 1.0.3 2022-08-19 [?] CRAN (R 4.2.0)
#> P S4Vectors * 0.36.2 2023-02-26 [?] Bioconductor
#> P sass 0.4.6 2023-05-03 [?] CRAN (R 4.2.0)
#> P ScaledMatrix 1.6.0 2022-11-01 [?] Bioconductor
#> P scales 1.2.1 2022-08-20 [?] CRAN (R 4.2.0)
#> P scater * 1.26.1 2022-11-13 [?] Bioconductor
#> P scran * 1.26.2 2023-01-19 [?] Bioconductor
#> P scuttle * 1.8.4 2023-01-19 [?] Bioconductor
#> P sessioninfo 1.2.2 2021-12-06 [?] CRAN (R 4.2.0)
#> P shiny 1.7.4 2022-12-15 [?] CRAN (R 4.2.0)
#> P shinyBS 0.61.1 2022-04-17 [?] CRAN (R 4.2.0)
#> P shinydashboard 0.7.2 2021-09-30 [?] CRAN (R 4.2.0)
#> P SingleCellExperiment * 1.20.1 2023-03-17 [?] Bioconductor
#> P sparseMatrixStats 1.10.0 2022-11-01 [?] Bioconductor
#> P statmod 1.5.0 2023-01-06 [?] CRAN (R 4.2.0)
#> P stringi 1.7.12 2023-01-11 [?] CRAN (R 4.2.0)
#> P stringr * 1.5.0 2022-12-02 [?] CRAN (R 4.2.0)
#> P SummarizedExperiment * 1.28.0 2022-11-01 [?] Bioconductor
#> P svglite 2.1.1 2023-01-10 [?] CRAN (R 4.2.0)
#> P systemfonts 1.0.4 2022-02-11 [?] CRAN (R 4.2.0)
#> P tibble * 3.2.1 2023-03-20 [?] CRAN (R 4.2.0)
#> P tidyr * 1.3.0 2023-01-24 [?] CRAN (R 4.2.0)
#> P tidyselect 1.2.0 2022-10-10 [?] CRAN (R 4.2.0)
#> P tidyverse * 2.0.0 2023-02-22 [?] CRAN (R 4.2.0)
#> P timechange 0.2.0 2023-01-11 [?] CRAN (R 4.2.0)
#> P tzdb 0.4.0 2023-05-12 [?] CRAN (R 4.2.0)
#> P UpSetR 1.4.0 2019-05-22 [?] CRAN (R 4.2.0)
#> P utf8 1.2.3 2023-01-31 [?] CRAN (R 4.2.0)
#> P vctrs 0.6.2 2023-04-19 [?] CRAN (R 4.2.0)
#> P vipor 0.4.5 2017-03-22 [?] CRAN (R 4.2.0)
#> P viridis 0.6.3 2023-05-03 [?] CRAN (R 4.2.0)
#> P viridisLite 0.4.2 2023-05-02 [?] CRAN (R 4.2.0)
#> P webshot 0.5.4 2022-09-26 [?] CRAN (R 4.2.0)
#> P withr 2.5.0 2022-03-03 [?] CRAN (R 4.2.0)
#> P xfun 0.39 2023-04-20 [?] CRAN (R 4.2.0)
#> P xml2 1.3.4 2023-04-27 [?] CRAN (R 4.2.0)
#> P xtable 1.8-4 2019-04-21 [?] CRAN (R 4.2.0)
#> P XVector 0.38.0 2022-11-01 [?] Bioconductor
#> P yaml 2.3.7 2023-01-23 [?] CRAN (R 4.2.0)
#> P zlibbioc 1.44.0 2022-11-01 [?] Bioconductor
#>
#> [1] /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2/benchmarks/covid/renv/library/R-4.2/x86_64-pc-linux-gnu
#> [2] /kyukon/home/gent/460/vsc46052/.cache/R/renv/sandbox/R-4.2/x86_64-pc-linux-gnu/4df86545
#>
#> P ── Loaded and on-disk path mismatch.
#>
#> ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────